ls_preprocessed <- preprocess_rna(path_rnaseq = 'rnaseq.RData', correct_batch = T, correct_gender = T)
print(ls_preprocessed$pbatch_bf)
print(ls_preprocessed$pgender_bf)
print(ls_preprocessed$pbatch_af)
print(ls_preprocessed$pgender_af)
DE_res <- DE_analysis(ls_preprocessed,
GeneBased=TRUE,
pDataBased=FALSE,
NewCondition=FALSE,
cond_nm='ENSG00000151012.9',
reference = 'low', # low, alive
correct_gender=TRUE,
extremes_only=TRUE)
## Unlist done
## Labeling done
## Filtering done
## factor levels were dropped which had no samples
## Design done
## factor levels were dropped which had no samples
## Warning: Column `gene`/`ENSEMBL` joining character vector and factor, coercing
## into character vector
## vsd symbols done
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1526 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## DESeq done
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
##
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
## Warning: Column `gene`/`ENSEMBL` joining character vector and factor, coercing
## into character vector
## res symbols done
## list done
heatmap_200(DE_res$res_df, DE_res$vsd_mat_sym, DE_res$meta_data, DE_res$pData_rnaseq)
x <- DE_res$res_df %>%
arrange(desc(abs(log2FoldChange)))
rownames(x) <- make.names(x$symbol, unique = T)
k <- c('ENSG00000250033.1', 'ENSG00000151012.9')
x <- x[-which(x$gene %in%k),]
head(x, 10)
## baseMean log2FoldChange lfcSE stat pvalue
## CRP 1.313927 3.876456 0.3567685 -0.1290811 8.972935e-01
## NSG2 5.806703 3.500774 0.3464664 1.9145769 5.554648e-02
## TRPM5 71.511963 3.290770 0.3183576 2.7024817 6.882397e-03
## RP11.443P15.2 373.936082 3.170488 0.3612413 8.8377859 9.763416e-19
## SH2D6 39.161949 3.163835 0.2504549 2.7665551 5.665201e-03
## CALML3 2.700342 3.150567 0.3854255 -0.4306354 6.667335e-01
## AKR1C1 5980.093163 3.150479 0.3673433 8.7648879 1.869627e-18
## PRSS33 4.239273 3.119136 0.3679369 -0.4910499 6.233912e-01
## RP11.749H17.2 37.671876 3.056101 0.3930168 8.0926310 5.838955e-16
## SLC14A2 311.178239 2.926689 0.3715233 8.0581331 7.746842e-16
## padj gene symbol
## CRP NA ENSG00000132693.8 CRP
## NSG2 3.266530e-01 ENSG00000170091.6 NSG2
## TRPM5 1.110171e-01 ENSG00000070985.9 TRPM5
## RP11.443P15.2 1.183359e-14 ENSG00000171658.4 RP11-443P15.2
## SH2D6 9.989930e-02 ENSG00000152292.12 SH2D6
## CALML3 8.824002e-01 ENSG00000178363.3 CALML3
## AKR1C1 1.699537e-14 ENSG00000187134.8 AKR1C1
## PRSS33 8.635078e-01 ENSG00000103355.8 PRSS33
## RP11.749H17.2 4.246205e-12 ENSG00000267354.1 RP11-749H17.2
## SLC14A2 4.694716e-12 ENSG00000132874.9 SLC14A2
volcano_plot(x, gene=NULL, p_title='SLC7A11')
Low SLC7A11 is the reference. When SLC7A11 is high, pathways shown below are up- or down- regulated
fgsea_res <- fgsea_analysis(DE_res)
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgsea(pathways = gmtPathways(pthw_path), stats = ranks, nperm = 1000): There are ties in the preranked stats (0.1% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fgsea_plot(fgsea_res$res_hm, pathways_title='Hallmark', condition_name='SLC7A11 low vs high')
fgsea_plot(fgsea_res$res_c1, pathways_title='C1 positional genes', condition_name='SLC7A11 low vs high')
fgsea_plot(fgsea_res$res_c2, pathways_title='C2 curated genes', condition_name='SLC7A11 low vs high')
fgsea_plot(fgsea_res$res_c3, pathways_title='C3 regulatory target genes', condition_name='SLC7A11 low vs high')
fgsea_plot(fgsea_res$res_c4, pathways_title='C4 cancer', condition_name='SLC7A11 low vs high')
fgsea_plot(fgsea_res$res_c5, pathways_title='C5 GO genes', condition_name='SLC7A11 low vs high')
fgsea_plot(fgsea_res$res_c6, pathways_title='C6 oncogenic', condition_name='SLC7A11 low vs high')
fgsea_plot(fgsea_res$res_c7, pathways_title='C7 immunologic', condition_name='SLC7A11 low vs high')
fgsea_plot(fgsea_res$res_msg, pathways_title='All signatures', condition_name='SLC7A11 low vs high')